#data preparing, choose a random sample of 2000 crimes happened during 2000-2022.
library(readr)
library(DT)
library(dplyr)
##
## 载入程辑包:'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
crime=read.csv("/Users/apple/Desktop/dv/final/Crime_Data_from_2020_to_Present (1).csv")
#clean version of the total dataset
col=c("DR_NO","Date.Rptd","Date.occ","TIME.OCC","AREA","AREA.NAME","Rpt.Dist.No","Crm.Cd.Desc","Mocodes",
"Vict.Age","Vict.Sex","Vict.Descent","Premis.Desc","Weapon.Desc","Status.Desc","LOCATION","LAT","LON")
clean_total=crime[,colnames(crime) %in% col]
clean_total$TIME.OCC <- as.character(clean_total$TIME.OCC)
proper_case <- function(x) {
return (gsub("\\b([A-Z])([A-Z]+)", "\\U\\1\\L\\2" , x, perl=TRUE))
}
clean_total <- clean_total %>% mutate(Crm.Cd.Desc = proper_case(Crm.Cd.Desc),
AREA=proper_case(AREA),
AREA.NAME=proper_case(AREA.NAME),
LOCATION=proper_case(LOCATION),
Weapon.Desc = proper_case(Weapon.Desc),
Premis.Desc=proper_case(Premis.Desc),
Status.Desc=proper_case(Status.Desc))
#delete the cases that are still under investigation
sum_ctg=clean_total %>%
group_by(Status.Desc)%>%
summarise(length(Status.Desc))
total1=subset(clean_total,Status.Desc=="Adult Arrest"|Status.Desc=="Adult Other" |Status.Desc=="Juv Arrest"|Status.Desc=="Juv Other")
#integragate crime category
sum_crime=total1 %>%
group_by(Crm.Cd.Desc)%>%
summarise(length(Crm.Cd.Desc))
#select the cirmes that happen more than 500 times
clean_total=subset(total1,Crm.Cd.Desc=="Intimate Partner - Simple Assault"|Crm.Cd.Desc=="Battery - Simple Assault"|Crm.Cd.Desc=="Assault With Deadly Weapon, Aggravated Assault
"|Crm.Cd.Desc==" Vandalism - Felony ($400 & Over, All Church Vandalisms) "|Crm.Cd.Desc=="Intimate Partner - Aggravated Assault"|Crm.Cd.Desc=="Criminal Threats - No Weapon Displayed"|Crm.Cd.Desc=="Robbery"|Crm.Cd.Desc=="Vehicle-Stolen"|Crm.Cd.Desc=="Burglary"|Crm.Cd.Desc=="Violation Of Restraining Order "|Crm.Cd.Desc=="Brandish Weapon "|Crm.Cd.Desc==" Vandalism - Misdeameanor ($399 Or Under)"|Crm.Cd.Desc=="Violation Of Court Order "|Crm.Cd.Desc==" Theft Plain - Petty ($950 & Under)"|Crm.Cd.Desc==" Letters, Lewd - Telephone Calls, Lewd"|Crm.Cd.Desc=="Child Abuse (Physical) - Simple Assault "|Crm.Cd.Desc=="Theft-Grand ($950.01 & Over)Excpt,Guns,Fowl,Livestk,Prod "|Crm.Cd.Desc==" Trespassing"|Crm.Cd.Desc=="Other Miscellaneous Crime "|Crm.Cd.Desc=="Contempt Of Court "|Crm.Cd.Desc==" Shoplifting - Petty Theft ($950 & Under)"|Crm.Cd.Desc==" Attempted Robbery"|Crm.Cd.Desc==" Battery Police (Simple)
"|Crm.Cd.Desc=="Battery With Sexual Contact "|Crm.Cd.Desc=="Rape, Forcible "|Crm.Cd.Desc=="
Other Assault "|Crm.Cd.Desc==" Burglary From Vehicle"|Crm.Cd.Desc=="Embezzlement, Grand Theft ($950.01 & Over) "|Crm.Cd.Desc==" Theft Of Identity")
#use datatable to show the first 100 cases
sample=clean_total[1:100, ]
datatable(sample, options = list(pageLength = 5,scrollX='400px'))
#visualize the crime # Preprocessing
library(tidyverse)
## ─ Attaching packages ──────────────────── tidyverse 1.3.1 ─
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.4 ✓ stringr 1.4.0
## ✓ tidyr 1.1.3 ✓ forcats 0.5.1
## ─ Conflicts ───────────────────── tidyverse_conflicts() ─
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library("lubridate")
##
## 载入程辑包:'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
clean_total <- clean_total %>%
mutate(Date = as.Date(Date.Rptd, "%m/%d/%Y %H:%M:%S")) %>%
mutate(Location = str_squish(LOCATION))
names(clean_total)[names(clean_total) == 'Crm.Cd.Desc'] <- "Category"
clean_total$Year <- format(clean_total$Date, format="%Y")
library(leaflet)
## Warning: 程辑包'leaflet'是用R版本4.1.2 来建造的
clean_total <- clean_total[clean_total$LON != 0, ] # display the first 10,000 rows
clean_total$popup <- paste("<br>", "<b>Category: </b>", clean_total$Category,
"<br>", "<b>Date: </b>", clean_total$Date,
"<br>", "<b>Address: </b>", clean_total$Location,
"<br>", "<b>Vict Age: </b>", clean_total$Vict.Age,
"<br>", "<b>Vict Sex: </b>", clean_total$Vict.Sex,
"<br>", "<b>Longitude: </b>", clean_total$LON,
"<br>", "<b>Latitude: </b>", clean_total$LAT)
leaflet(clean_total, width = "100%") %>% addTiles() %>%
addTiles(group = "OSM (default)") %>%
addProviderTiles(provider = "Esri.WorldStreetMap",group = "World StreetMap") %>%
addProviderTiles(provider = "Esri.WorldImagery",group = "World Imagery") %>%
# addProviderTiles(provider = "NASAGIBS.ViirsEarthAtNight2012",group = "Nighttime Imagery") %>%
addMarkers(lng = ~LON, lat = ~LAT, popup = clean_total$popup, clusterOptions = markerClusterOptions()) %>%
addLayersControl(
baseGroups = c("OSM (default)","World StreetMap", "World Imagery"),
options = layersControlOptions(collapsed = FALSE)
)